Load the library

Clean the data

meteorite_data <- read.csv("data/meteorite-landings.csv")

meteorite_data <- meteorite_data %>% 
  clean_names() %>%
  filter(year >= 1970 & year <= 2013) %>% 
  select(-fall)

meteorite_data_cleaned <- meteorite_data %>%
  filter(!is.na(reclat) & !is.na(reclong)) %>%
  filter(!is.na(mass) & !is.na(year))

summary(meteorite_data_cleaned)
##      name                 id          nametype           recclass        
##  Length:35901       Min.   :    4   Length:35901       Length:35901      
##  Class :character   1st Qu.:11000   Class :character   Class :character  
##  Mode  :character   Median :22209   Mode  :character   Mode  :character  
##                     Mean   :25989                                        
##                     3rd Qu.:40608                                        
##                     Max.   :57458                                        
##       mass              year          reclat          reclong       
##  Min.   :      0   Min.   :1970   Min.   :-87.37   Min.   :-165.43  
##  1st Qu.:      6   1st Qu.:1987   1st Qu.:-76.72   1st Qu.:   0.00  
##  Median :     25   Median :1997   Median :-71.50   Median :  53.93  
##  Mean   :   1483   Mean   :1995   Mean   :-43.60   Mean   :  66.10  
##  3rd Qu.:    133   3rd Qu.:2003   3rd Qu.:  0.00   3rd Qu.: 157.22  
##  Max.   :4000000   Max.   :2013   Max.   : 81.17   Max.   : 178.20  
##  geo_location      
##  Length:35901      
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Devided the location based on different continents

meteorite_sf <- st_as_sf(meteorite_data_cleaned, coords = c("reclong", "reclat"), crs = 4326, remove = FALSE)

continents <- ne_countries(scale = "medium", returnclass = "sf") %>%
  group_by(continent) %>%
  summarise()

meteorite_data_with_continents <- st_join(meteorite_sf, continents["continent"], join = st_intersects)

meteorite_data_with_continents <- meteorite_data_with_continents %>%
  mutate(continent = replace_na(continent, "Ocean"))

print(meteorite_data_with_continents)
## Simple feature collection with 35901 features and 10 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -165.4333 ymin: -87.36667 xmax: 178.2 ymax: 81.16667
## Geodetic CRS:  WGS 84
## First 10 features:
##                name    id nametype      recclass  mass year   reclat   reclong
## 1          Acapulco    10    Valid   Acapulcoite  1914 1976 16.88333 -99.90000
## 2  Aioun el Atrouss   423    Valid  Diogenite-pm  1000 1974 16.39806  -9.57028
## 3           Akyumak   433    Valid     Iron, IVA 50000 1981 39.91667  42.81667
## 4         Al Zarnkh   447    Valid           LL5   700 2001 13.66033  28.96000
## 5   Alby sur Chéran   458    Valid Eucrite-mmict   252 2002 45.82133   6.01533
## 6    Almahata Sitta 48915    Valid   Ureilite-an  3950 2008 20.74575  32.41275
## 7        Alta'ameem  2284    Valid           LL5  6000 1977 35.27333  44.21556
## 8            Anlong  2305    Valid            H5  2500 1971 25.15000 105.18333
## 9            Aomori  2313    Valid            L6   320 1984 40.81056 140.78556
## 10        Ash Creek 48954    Valid            L6  9500 2009 31.80500 -97.01000
##               geo_location     continent                  geometry
## 1  (16.883330, -99.900000) North America    POINT (-99.9 16.88333)
## 2   (16.398060, -9.570280)        Africa POINT (-9.57028 16.39806)
## 3   (39.916670, 42.816670)          Asia POINT (42.81667 39.91667)
## 4   (13.660330, 28.960000)        Africa    POINT (28.96 13.66033)
## 5    (45.821330, 6.015330)        Europe  POINT (6.01533 45.82133)
## 6   (20.745750, 32.412750)        Africa POINT (32.41275 20.74575)
## 7   (35.273330, 44.215560)          Asia POINT (44.21556 35.27333)
## 8  (25.150000, 105.183330)          Asia    POINT (105.1833 25.15)
## 9  (40.810560, 140.785560)          Asia POINT (140.7856 40.81056)
## 10 (31.805000, -97.010000) North America     POINT (-97.01 31.805)

Make the location based on the map

continents <- unique(meteorite_data_with_continents %>% pull(continent))
color_palette <- setNames(brewer.pal(length(continents), "Set1"), continents)
map <- plot_ly(type = 'scattermapbox', mode = 'markers')
for (cont in continents) {
  continent_data <- subset(meteorite_data_with_continents, continent == cont)
  map <- map %>%
    add_trace(
      data = continent_data,
      lat = ~reclat,
      lon = ~reclong,
      marker = list(size = 4, color = color_palette[cont]),
      text = ~paste("Name:", name, "<br>Year:", year, "<br>Mass:", mass, "<br>Continent:", continent),
      name = cont
    )
}
map <- map %>%
  layout(
    title = 'Global Distribution of Meteorite Landings by Continent (1970-2013)',
    mapbox = list(
      style = "open-street-map",
      zoom = 1,
      center = list(lat = 0, lon = 0)
    ),
    legend = list(title = list(text = "Continent"))
  )
map

Line plot based on the year and continents

yearly_landings <- meteorite_data_with_continents %>%
  st_drop_geometry() %>%
  group_by(year, continent) %>%
  summarise(count = n(), .groups = 'drop')
line_plot_by_continent <- plot_ly(
  data = yearly_landings,
  x = ~year,
  y = ~count,
  type = 'scatter',
  mode = 'lines',
  color = ~continent
) %>%
  layout(
    title = 'Meteorite Landings Over Time by Continent',
    xaxis = list(title = 'Year'),
    yaxis = list(title = 'Number of Meteorite Landings'),
    legend = list(title = list(text = 'Continent'))
  )
line_plot_by_continent

Welch’s ANOVA test

meteorite_data_with_continents <- meteorite_data_with_continents %>%
  mutate(continent = as.factor(continent))

levene_test <- leveneTest(mass ~ continent, data = meteorite_data_with_continents)
print(levene_test)
## Levene's Test for Homogeneity of Variance (center = median)
##          Df F value    Pr(>F)    
## group     7   24.75 < 2.2e-16 ***
##       35893                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The result of Levene’s Test shows a p-value of \(1.85 \times 10^{-11}\), which is far below 0.05. This indicates that the variances of mass across different continents are not equal. Given this result, we should proceed with Welch’s ANOVA instead of the standard ANOVA, as Welch’s ANOVA does not assume equal variances.

Null and Alternative Hypotheses \[ H_0: \mu_{\text{Africa}} = \mu_{\text{Asia}} = \mu_{\text{Europe}} = \mu_{\text{North America}} = \mu_{\text{South America}} = \mu_{\text{Oceania}} = \mu_{\text{Antarctica}}\] \[H_1: \text{At least one continent has a different mean meteorite mass.}\] Rejection Rule: Reject the null hypothesis if the p-value is less than the significance level.

\[\text{Reject } H_0 \text{ if } p\text{-value} < \alpha = 0.05.\]

welch_anova <- oneway.test(mass ~ continent, data = meteorite_data_with_continents, var.equal = FALSE)
print(welch_anova)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  mass and continent
## F = 10.461, num df = 7.0, denom df = 1417.5, p-value = 7.261e-13

\[F = 9.2968, \quad \text{df}_1 = 7, \quad \text{df}_2 = 2529, \quad p\text{-value} = 2.062 \times 10^{-11}\]

\[\text{Since } p\text{-value} < 0.05, \text{ we reject } H_0.\] Conclusion: There is a statistically significant difference in mean meteorite mass across continents.

summary_table <- meteorite_data_with_continents %>%
  group_by(continent) %>%   
  summarize(count = n()) %>%        
  arrange(continent)
area_data <- data.frame(
  continent = c("Africa", "Antarctica", "Asia", "Europe", 
                "North America", "South America", "Oceania"),
  area_km2 = c(29648481, 13720000, 31033131, 22134710, 
               21330000, 17461112, 8486460)
)

summary_table <- meteorite_data_with_continents %>%
  group_by(continent) %>%
  summarize(count = n()) %>%
  arrange(continent)

merged_table <- merge(summary_table, area_data, by = "continent")

merged_table <- merged_table %>%
  mutate(landings_per_km2 = count / area_km2)

final_table <- merged_table %>%
  select(continent, count, area_km2, landings_per_km2) %>%
  knitr::kable(digits = c(0, 0, 0, 6), caption = "Meteorite Landings by Continent")

final_table
Meteorite Landings by Continent
continent count area_km2 landings_per_km2 geometry
Africa 2690 29648481 0.000091 MULTIPOINT ((-14.71633 24.6…
Antarctica 22075 13720000 0.001609 MULTIPOINT ((-125 -84.75), …
Asia 3139 31033131 0.000101 MULTIPOINT ((27.32997 37.35…
Europe 125 22134710 0.000006 MULTIPOINT ((-8.28 37.60833…
North America 816 21330000 0.000038 MULTIPOINT ((-64.43333 48.8…
Oceania 436 8486460 0.000051 MULTIPOINT ((117.9667 -31.5…
South America 413 17461112 0.000024 MULTIPOINT ((-41.73356 -20….

Possible Reasons for Differences in Mean Meteorite Mass Across Continents

Variations in Land Area: Different continents have different land areas, which may affect the probability of meteorites being found and recorded. Larger continents may have more land to catch falling meteorites, resulting in larger finds and potentially heavier meteorites being discovered.

Data Quality and Collection Bias: Differences in meteorite mass across continents may also result from data quality and collection bias. Factors such as population density, exploration frequency, geological and environmental conditions, and scientific interest can all impact the likelihood of discovering and recording meteorites.

Prediction

set.seed(123)

clusters <- kmeans(meteorite_data_cleaned[, c("reclat", "reclong")], centers = 500)

cluster_centers <- data.frame(
  reclat = clusters$centers[, 1],
  reclong = clusters$centers[, 2]
)

set.seed(123)
predicted_locations <- data.frame(
  reclat = rnorm(5000, mean = rep(cluster_centers$reclat, each = 10), sd = 1),
  reclong = rnorm(5000, mean = rep(cluster_centers$reclong, each = 10), sd = 1)
)

fig <- plot_ly(
  data = predicted_locations,
  lat = ~reclat,
  lon = ~reclong,
  type = "scattergeo",
  mode = "markers",
  marker = list(size = 3, color = "blue", opacity = 0.7),
  text = ~paste0("Lat: ", round(reclat, 2), "<br>Lon: ", round(reclong, 2))
) %>%
  layout(
    title = "Enhanced Predicted Meteorite Landings",
    geo = list(
      showland = TRUE,
      showcountries = TRUE,
      projection = list(type = 'natural earth')
    )
  )

fig